# Mark Nieman and Maxwell Allamong
# School of Thought - Data Analysis for Appendix
# November 7, 2022
sink("NiemanAllamong_SoT_Appendix.txt")
rm(list = ls())

#Load Packages
library(tidyverse)
library(magrittr)
library(stargazer)
library(sandwich)
library(ggplot2)
library(ggrepel)

# Part 1: Data Construction

# Figure A.1: Heat Map of Leader Education Data Availability, by Region and Decade of Leader Tenure Onset.
SoT_data<-read_csv("SoT_data.csv") 

leaders.infoavail <- SoT_data %>%
  mutate(info.avail = ifelse(!is.na(uni.countryabb), 1, 0)) %>%
  group_by(region,decade) %>%
  summarize(mean = mean(info.avail)) 

infoavail.heatmap <- ggplot(leaders.infoavail, aes(region, decade)) +
  geom_tile(aes(fill = mean)) +
  labs(x = "Region", y = "Decade in which leader tenure started") +
  scale_fill_gradient(name = "Proportion For Whom \nEducational History is \nAvailable", low = "white", high = "black") +
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 14))
pdf(height = 8, width = 11, "infoavail_heatmap.pdf")
infoavail.heatmap
dev.off()



# Part 2: Correlation of Dependent Variables

# Table A.1: Correlation Matrix of Dependent Variables
DVs<- SoT_data %>% select(change.trade.liberal, change.LJI,change.kaopen,change.hrscore,change.libdem)
round(cor(DVs,use = "na.or.complete"), 3)



# Part 3: Military Schools
# Load and organize military data into Anglo-American, Other Western, and Hierarchical
SoT_mil <- SoT_data   
SoT_mil$AngloAmer.edu[SoT_mil$uni.type == "Military" & (SoT_mil$uni.countryabb == "USA" | SoT_mil$uni.countryabb == "UKG" | SoT_mil$uni.countryabb == "AUL" | SoT_mil$uni.countryabb == "NEW" | SoT_mil$uni.countryabb == "CAN")]<-1
SoT_mil$euro.edu[SoT_mil$uni.type == "Military" & (SoT_mil$uni.countryabb == "AUS" | SoT_mil$uni.countryabb == "BEL" | SoT_mil$uni.countryabb == "DEN" |
                                                     SoT_mil$uni.countryabb == "FIN" | SoT_mil$uni.countryabb == "FRN" | 
                                                     SoT_mil$uni.countryabb == "GMY" | SoT_mil$uni.countryabb == "GRC" | 
                                                     SoT_mil$uni.countryabb == "IRE" | SoT_mil$uni.countryabb == "ITA" | 
                                                     SoT_mil$uni.countryabb == "NOR" | SoT_mil$uni.countryabb == "NTH" | 
                                                     SoT_mil$uni.countryabb == "POR" | SoT_mil$uni.countryabb == "SPN" | 
                                                     SoT_mil$uni.countryabb == "SWD" | SoT_mil$uni.countryabb == "SWZ")] <-1
SoT_mil$noneuro.edu[SoT_mil$uni.type == "Military" & SoT_mil$AngloAmer.edu==0 & SoT_mil$euro.edu==0]<-1
SoT_mil1<- SoT_mil %>% filter(start.trade.liberal == 0)


# Table A.2: Frequency of Anglo-American Military Schools.
mil_school<- SoT_mil %>% filter(AAmil.edu==1)
table(mil_school$university)

# Table A.3: Effect of Pooling Military and Education Types on Political Outcomes
trade.military <- glm(change.trade.liberal ~ AngloAmer.edu + euro.edu + noneuro.edu + 
                                log(rgdppc) + oil + xconstV + UK_col + us_ally + 
                                EthFrac + log(pop) + tenure, data = SoT_mil1, family = "binomial")
trade.military.se <- vcovCL(trade.military, type = "HC0", SoT_mil1$ccode)
trade.military.se <- sqrt(diag(trade.military.se))

rl.military <- lm(change.LJI ~ AngloAmer.edu + euro.edu + noneuro.edu +
                            log(rgdppc) + oil + UK_col + us_ally + 
                            EthFrac + log(pop) + tenure + start.LJI, data = SoT_mil)
rl.military.se <- vcovCL(rl.military, type = "HC1", SoT_mil$ccode)
rl.military.se <- sqrt(diag(rl.military.se)) 

ka.military <- lm(change.kaopen ~ AngloAmer.edu + euro.edu + noneuro.edu +
                            log(rgdppc) + oil + xconstV + UK_col + us_ally + 
                            EthFrac + log(pop) + tenure + start.kaopen, data = SoT_mil)
ka.military.se <- vcovCL(ka.military, type = "HC1", SoT_mil$ccode)
ka.military.se <- sqrt(diag(ka.military.se))

hr.military <- lm(change.hrscore ~ AngloAmer.edu + euro.edu + noneuro.edu +
                            log(rgdppc) + oil + xconstV + UK_col + us_ally + 
                            EthFrac + log(pop) + tenure + start.hrscore, data = SoT_mil)
hr.military.se <- vcovCL(hr.military, type = "HC1", SoT_mil$ccode)
hr.military.se <- sqrt(diag(hr.military.se))

ld.military <- lm(change.libdem ~ AngloAmer.edu + euro.edu + noneuro.edu + 
                            log(rgdppc) + oil +  UK_col + us_ally + 
                            EthFrac + log(pop) + tenure + start.libdem, data = SoT_mil)
ld.military.se <- vcovCL(ld.military, type = "HC1", SoT_mil$ccode)
ld.military.se <- sqrt(diag(ld.military.se))

stargazer(trade.military, rl.military, ka.military, hr.military, ld.military, digits = 3,
          se = list(trade.military.se,rl.military.se,ka.military.se,hr.military.se,ld.military.se),
          intercept.bottom = T, dep.var.labels = c("Trade Liberalization","Rule of Law","Financial Openness","Human Rights","Liberal Democracy"),
          covariate.labels = c("Anglo-American Education", "Other Western Education", "Hierarchical Education",
                               "Economic Development","Oil Producer","Executive Constraints","Former British Colony", "US Ally",
                               "Ethnic Fractionalization","Population","Time in Office","DV Entering Office"),
          keep.stat = c("n","ll","rsq"),align=T,no.space = T,
          star.char = c("+","*", "**"),
          star.cutoffs = c(0.2, 0.1, 0.05),
          notes = c("$^{**}$p$<$0.05$, ^{*}$p$<$0.1, two-tailed; $^{+}$p$<$0.1, one-tailed;. Standard errors clustered by country."), 
          notes.append = F)



# Part 4: Addressing Potential Endogeneity

  # Part 4(a): Self-Selection by Liberal-inclined Leaders

  # Parent Background from LEAD data set
  SoT_LEAD<-read_csv("SoT_LEAD.csv")   
  
  # Table A.4: Educational Background of Leaders from Elite and Non-elite Families
    # Attend Anglo-American University
    table(SoT_LEAD$AngloAmer.edu,SoT_LEAD$parent_elite)
    t.test(AngloAmer.edu~parent_elite,data=SoT_LEAD)
    # Attend Other Western University
    table(SoT_LEAD$euro.edu,SoT_LEAD$parent_elite)
    t.test(euro.edu~parent_elite,data=SoT_LEAD)
    # Attend Hierarchical (Non-Western) University
    table(SoT_LEAD$noneuro.edu,SoT_LEAD$parent_elite)
    t.test(noneuro.edu~parent_elite,data=SoT_LEAD)
    # No University Education
    SoT_LEAD$nouni.edu<-SoT_LEAD$none.edu+SoT_LEAD$mil.edu
    table(SoT_LEAD$nouni.edu,SoT_LEAD$parent_elite)
    t.test(nouni.edu~parent_elite,data=SoT_LEAD)
  
  
  # Table A.5: Tertiary Educational Model and Political Outcomes, Subset of Politically Elite Families
  SoT_LEAD1<- SoT_LEAD %>% filter(parent_elite == 1)
  SoT_LEAD2<- SoT_LEAD1 %>% filter(start.trade.liberal == 0)
  trade.elite <- glm(change.trade.liberal ~ AngloAmer.edu + 
                               log(rgdppc) + oil + xconstV + UK_col + us_ally + 
                               EthFrac + log(pop) + tenure, data = SoT_LEAD2, family = "binomial")
  trade.elite.se <- vcovCL(trade.elite, type = "HC0", SoT_LEAD2$ccode)
  trade.elite.se <- sqrt(diag(trade.elite.se))
  
  rl.elite <- lm(change.LJI ~ AngloAmer.edu + 
                           log(rgdppc) + oil +  UK_col + us_ally + 
                           EthFrac + log(pop) + tenure + start.LJI, data = SoT_LEAD1)
  rl.elite.se <- vcovCL(rl.elite, type = "HC1", SoT_LEAD1$ccode)
  rl.elite.se <- sqrt(diag(rl.elite.se)) 
  
  ka.elite <- lm(change.kaopen ~ AngloAmer.edu + 
                           log(rgdppc) + oil + xconstV + UK_col + us_ally + 
                           EthFrac + log(pop) + tenure + start.kaopen, data = SoT_LEAD1)
  ka.elite.se <- vcovCL(ka.elite, type = "HC1", SoT_LEAD1$ccode)
  ka.elite.se <- sqrt(diag(ka.elite.se))
  
  hr.elite <- lm(change.hrscore ~ AngloAmer.edu + 
                           log(rgdppc) + oil + xconstV + UK_col + us_ally + 
                           EthFrac + log(pop) + tenure + start.hrscore, data = SoT_LEAD1)
  hr.elite.se <- vcovCL(hr.elite, type = "HC1", SoT_LEAD1$ccode)
  hr.elite.se <- sqrt(diag(hr.elite.se))
  
  ld.elite <- lm(change.libdem ~ AngloAmer.edu + 
                           log(rgdppc) + oil +  UK_col + us_ally + 
                           EthFrac + log(pop) + tenure + start.libdem, data = SoT_LEAD1)
  ld.elite.se <- vcovCL(ld.elite, type = "HC1", SoT_LEAD1$ccode)
  ld.elite.se <- sqrt(diag(ld.elite.se))
  
  stargazer(trade.elite, rl.elite, ka.elite, hr.elite, ld.elite, digits = 3,
            se = list(trade.elite.se,rl.elite.se,ka.elite.se,hr.elite.se,ld.elite.se),
            intercept.bottom = T, dep.var.labels = c("Trade Liberalization","Rule of Law","Financial Openness","Human Rights","Liberal Democracy"),
            covariate.labels = c("Anglo-American Education",  
                                 "Economic Development","Oil Producer","Executive Constraints","Former British Colony", "US Ally",
                                 "Ethnic Fractionalization","Population","Time in Office","DV Entering Office"),
            keep.stat = c("n","ll","rsq"),align=T,no.space = T,
            star.char = c("+","*", "**"),
            star.cutoffs = c(0.2, 0.1, 0.05),
            notes = c("$^{**}$p$<$0.05$, ^{*}$p$<$0.1, two-tailed; $^{+}$p$<$0.1, one-tailed;. Standard errors clustered by country."),  
            notes.append = F)
  
  # Table A.6: Tertiary Educational Model and Political Outcomes, Matched Sample
  match<-read_csv("SoT_match.csv") 
  match1<- match %>% filter(start.trade.liberal == 0)
  
  trade.match <- glm(change.trade.liberal ~ AngloAmer.edu +
                               log(rgdppc) + oil + xconstV + UK_col + us_ally + 
                               EthFrac + log(pop) + tenure, data = match1, family = "binomial")
  trade.match.se <- vcovCL(trade.match, type = "HC0", match1$ccode)
  trade.match.se <- sqrt(diag(trade.match.se))
  
  rl.match <- lm(change.LJI ~ AngloAmer.edu +
                           log(rgdppc) + oil + UK_col + us_ally + 
                           EthFrac + log(pop) + tenure + start.LJI, data = match)
  rl.match.se <- vcovCL(rl.match, type = "HC1", match$ccode)
  rl.match.se <- sqrt(diag(rl.match.se)) 
  
  ka.match <- lm(change.kaopen ~ AngloAmer.edu +
                           log(rgdppc) + oil + xconstV + UK_col + us_ally + 
                           EthFrac + log(pop) + tenure + start.kaopen, data = match)
  ka.match.se <- vcovCL(ka.match, type = "HC1", match$ccode)
  ka.match.se <- sqrt(diag(ka.match.se))
  
  hr.match <- lm(change.hrscore ~ AngloAmer.edu +
                           log(rgdppc) + oil + xconstV + UK_col + us_ally + 
                           EthFrac + log(pop) + tenure + start.hrscore, data = match)
  hr.match.se <- vcovCL(hr.match, type = "HC1", match$ccode)
  hr.match.se <- sqrt(diag(hr.match.se))
  
  ld.match <- lm(change.libdem ~ AngloAmer.edu +
                           log(rgdppc) + oil +  UK_col + us_ally + 
                           EthFrac + log(pop) + tenure + start.libdem, data = match)
  ld.match.se <- vcovCL(ld.match, type = "HC1", match$ccode)
  ld.match.se <- sqrt(diag(ld.match.se))
  
  stargazer(trade.match, rl.match, ka.match, hr.match, ld.match, digits = 3,
            se = list(trade.match.se,rl.match.se,ka.match.se,hr.match.se,ld.match.se),
            intercept.bottom = T, dep.var.labels = c("Trade Liberalization","Rule of Law","Financial Openness","Human Rights","Liberal Democracy"),
            covariate.labels = c("Anglo-American Education",
                                 "Economic Development","Oil Producer","Executive Constraints","Former British Colony", "US Ally",
                                 "Ethnic Fractionalization","Population","Time in Office","DV Entering Office"),
            keep.stat = c("n","ll","rsq"),align=T,no.space = T,
            star.char = c("+","*", "**"),
            star.cutoffs = c(0.2, 0.1, 0.05),
            notes = c("$^{**}$p$<$0.05$, ^{*}$p$<$0.1, two-tailed; $^{+}$p$<$0.1, one-tailed;. Standard errors clustered by country."), 
            notes.append = F)

  # Part 4(b): Structural Conditions
    
  ## Set up data to include previous leader's change in policy measure (trade liberalization, rule of law, etc)
  lag<- SoT_data %>% group_by(ccode) %>% mutate(lag.trade = dplyr::lag(change.trade.liberal, n = 1, default = NA)) %>%
    mutate(lag.rl = dplyr::lag(change.LJI, n = 1, default = NA)) %>% 
    mutate(lag.ka = dplyr::lag(change.kaopen, n = 1, default = NA)) %>% 
    mutate(lag.hr = dplyr::lag(change.hrscore, n = 1, default = NA)) %>% 
    mutate(lag.ld = dplyr::lag(change.libdem, n = 1, default = NA)) %>% 
    select(X1,obsid,ccode,name,idacr,leader, change.LJI, lag.rl, change.kaopen, lag.ka, change.hrscore, lag.hr, change.libdem, lag.ld, everything())
  ## Divide dataset into democratic and autocratic subsamples
  SoT_Democ <- lag  %>% filter(start.libdem>.5)
  SoT_nonDemoc <- lag  %>% filter(start.libdem<.5)
  
  # Table A.7: The Effect of Prior Liberalizing on Next Leader's Education (DV is leader education)
  AA.elect <- glm(AngloAmer.edu ~ log(rgdppc) + oil  + UK_col + us_ally + EthFrac + log(pop) 
                          + lag.trade + lag.rl + lag.ka + lag.hr + lag.ld 
                          , data = SoT_Democ)
  AA.elect.se <- vcovCL(AA.elect, type = "HC0", SoT_Democ$ccode)
  AA.elect.se <- sqrt(diag(AA.elect.se)) 
  
  euro.elect <- glm(euro.edu ~ log(rgdppc) + oil  + UK_col + us_ally + EthFrac + log(pop) 
                            + lag.trade + lag.rl + lag.ka + lag.hr + lag.ld 
                            , data = SoT_Democ)
  euro.elect.se <- vcovCL(euro.elect, type = "HC0", SoT_Democ$ccode)
  euro.elect.se <- sqrt(diag(euro.elect.se)) 
  
  hier.elect <- glm(noneuro.edu ~ log(rgdppc) + oil  + UK_col + us_ally + EthFrac + log(pop) 
                            + lag.trade + lag.rl + lag.ka + lag.hr + lag.ld 
                            , data = SoT_Democ)
  hier.elect.se <- vcovCL(hier.elect, type = "HC0", SoT_Democ$ccode)
  hier.elect.se <- sqrt(diag(hier.elect.se))   
  
  AA.appoint <- glm(AngloAmer.edu ~ log(rgdppc) + oil  + UK_col + us_ally + EthFrac + log(pop) 
                            + lag.trade + lag.rl + lag.ka + lag.hr + lag.ld 
                            , data = SoT_nonDemoc)
  AA.appoint.se <- vcovCL(AA.appoint, type = "HC0", SoT_nonDemoc$ccode)
  AA.appoint.se <- sqrt(diag(AA.appoint.se)) 
  
  euro.appoint <- glm(euro.edu ~ log(rgdppc) + oil  + UK_col + us_ally + EthFrac + log(pop) 
                              + lag.trade + lag.rl + lag.ka + lag.hr + lag.ld 
                              , data = SoT_nonDemoc)
  euro.appoint.se <- vcovCL(euro.appoint, type = "HC0", SoT_nonDemoc$ccode)
  euro.appoint.se <- sqrt(diag(euro.appoint.se)) 
  
  hier.appoint <- glm(noneuro.edu ~ log(rgdppc) + oil  + UK_col + us_ally + EthFrac + log(pop) 
                              + lag.trade + lag.rl + lag.ka + lag.hr + lag.ld 
                              , data = SoT_nonDemoc)
  hier.appoint.se <- vcovCL(hier.appoint, type = "HC0", SoT_nonDemoc$ccode)
  hier.appoint.se <- sqrt(diag(hier.appoint.se))   
  
  stargazer(AA.elect, euro.elect, hier.elect, AA.appoint, euro.appoint, hier.appoint,  digits = 3,
            se = list(AA.elect.se, euro.elect.se, hier.elect.se, AA.appoint.se, euro.appoint.se, hier.appoint.se),
            intercept.bottom = T, dep.var.labels = c("AA Elected","Euro. Elected","Hier. Elected", "AA Selected", "Euro. Selected", "Hier. Selected"),
            covariate.labels = c("Economic Development","Oil Producer","Executive Constraints","Former British Colony", "US Ally",
                                 "Ethnic Fractionalization","Population", "Prior Leader's Change in Trade",
                                 "Prior Leader's Change in Financial Openness", "Prior Leader's Change in Human Rights",
                                 "Prior Leader's Change in Liberal Democracy"),
            keep.stat = c("n","ll","rsq"),align=T,no.space = T,
            star.char = c("*", "**"),
            star.cutoffs = c( 0.1, 0.05),
            notes = c("$^{**}$p$<$0.05$, ^{*}$p$<$0.1, two-tailed; $^{+}$p$<$0.1, one-tailed;. Standard errors clustered by country."), 
            notes.append = F)  
  
  # Table A.8: Tertiary Educational Model and Political Outcomes, Predecessor Died
  SoT_natdeath<-read_csv("SoT_natdeath.csv")
  
  trade.natdeath<-lm(change.imports.tottrade~AngloAmer.edu+start.imports.tottrade,data=SoT_natdeath)
  rl.natdeath<-lm(change.LJI~AngloAmer.edu+start.LJI,data=SoT_natdeath)
  ka.natdeath<-lm(change.kaopen~AngloAmer.edu+start.kaopen,data=SoT_natdeath)
  hr.natdeath<-lm(change.hrscore~AngloAmer.edu+start.hrscore,data=SoT_natdeath)
  ld.natdeath<-lm(change.libdem~AngloAmer.edu+start.libdem,data=SoT_natdeath)
  
  stargazer(trade.natdeath, rl.natdeath, ka.natdeath, hr.natdeath, ld.natdeath, digits = 3,
            se =NULL,
            intercept.bottom = T, dep.var.labels = c("Alt. Trade Measure" ,"Rule of Law","Financial Openness","Human Rights","Liberal Democracy"),
            keep.stat = c("n","rsq"),align=T,no.space = T,
            star.char = c("+","*", "**"),
            star.cutoffs = c(0.2, 0.1, 0.05),
            notes = c("$^{**}$p$<$0.05$, ^{*}$p$<$0.1, two-tailed; $^{+}$p$<$0.1, one-tailed."),  
            notes.append = F) 
  
  # Table A.9: Additional Tertiary Educational Models and Political Outcomes, Predecessor Died
  trade.natdeath2<-lm(change.imports.tottrade~AngloAmer.edu+euro.edu+noneuro.edu+start.imports.tottrade,data=SoT_natdeath)
  rl.natdeath<-lm(change.LJI~AngloAmer.edu+euro.edu+noneuro.edu+start.LJI,data=SoT_natdeath)
  ka.natdeath<-lm(change.kaopen~AngloAmer.edu+euro.edu+noneuro.edu+start.kaopen,data=SoT_natdeath)
  hr.natdeath<-lm(change.hrscore~AngloAmer.edu+euro.edu+noneuro.edu+start.hrscore,data=SoT_natdeath)
  ld.natdeath<-lm(change.libdem~AngloAmer.edu+euro.edu+noneuro.edu+start.libdem,data=SoT_natdeath)
  
  stargazer(trade.natdeath2, rl.natdeath, ka.natdeath, hr.natdeath, ld.natdeath, digits = 3,
            se =NULL,
            intercept.bottom = T, dep.var.labels = c("Alt. Trade Measure" ,"Rule of Law","Financial Openness","Human Rights","Liberal Democracy"),
            keep.stat = c("n","rsq"),align=T,no.space = T,
            star.char = c("+","*", "**"),
            star.cutoffs = c(0.2, 0.1, 0.05),
            notes = c("$^{**}$p$<$0.05$, ^{*}$p$<$0.1, two-tailed; $^{+}$p$<$0.1, one-tailed."),  
            notes.append = F) 
  
  
  # Table A.10: Educational Models and Political Outcomes, Instrumental Variables
  # See stata do.file


# Part 5: Case Fit and Outliers
  
  # Re-estimate main models
  SoT_data1<- SoT_data %>% filter(start.trade.liberal == 0)
  trade1 <- glm(change.trade.liberal ~ AngloAmer.edu + euro.edu + noneuro.edu  +
                  log(rgdppc) + oil + xconstV + UK_col + us_ally + 
                  EthFrac + log(pop) + tenure, data = SoT_data1, family = "binomial", na.action=na.exclude)
  
  trade2 <- glm(change.trade.liberal ~ AA.econ + AA.law + AA.socsci + AA.hum + AA.other + noAA.econ + noAA.law+ noAA.socsci + noAA.hum + noAA.other + AAmil.edu + noAAmil.edu +
                  log(rgdppc) + oil + xconstV + UK_col + us_ally + 
                  EthFrac + log(pop) + tenure, data = SoT_data1, family = "binomial", na.action=na.exclude)
  rl1 <- lm(change.LJI ~ AngloAmer.edu + euro.edu + noneuro.edu +
              log(rgdppc) + oil +  UK_col + us_ally + 
              EthFrac + log(pop) + tenure + start.LJI, data = SoT_data, na.action=na.exclude)
  rl2 <- lm(change.LJI ~ AA.econ + AA.law + AA.socsci + AA.hum + AA.other + noAA.econ + noAA.law+ noAA.socsci + noAA.hum + noAA.other + AAmil.edu + noAAmil.edu +
              log(rgdppc) + oil +  UK_col + us_ally + 
              EthFrac + log(pop) + tenure + start.LJI, data = SoT_data, na.action = na.exclude)
  ka1 <- lm(change.kaopen ~ AngloAmer.edu + euro.edu + noneuro.edu + 
              log(rgdppc) + oil + xconstV + UK_col + us_ally + 
              EthFrac + log(pop) + tenure + start.kaopen, data = SoT_data, na.action=na.exclude)
  ka2 <- lm(change.kaopen ~ AA.econ + AA.law + AA.socsci + AA.hum + AA.other + noAA.econ + noAA.law+ noAA.socsci + noAA.hum + noAA.other + AAmil.edu + noAAmil.edu +
              log(rgdppc) + oil + xconstV + UK_col + us_ally + 
              EthFrac + log(pop) + tenure + start.kaopen, data = SoT_data, na.action=na.exclude)
  hr1 <- lm(change.hrscore ~ AngloAmer.edu + euro.edu + noneuro.edu +
              log(rgdppc) + oil + xconstV + UK_col + us_ally + 
              EthFrac + log(pop) + tenure + start.hrscore, data = SoT_data, na.action=na.exclude)
  hr2 <- lm(change.hrscore ~ AA.econ + AA.law + AA.socsci + AA.hum + AA.other + noAA.econ + noAA.law+ noAA.socsci + noAA.hum + noAA.other + AAmil.edu + noAAmil.edu + 
              log(rgdppc) + oil + xconstV + UK_col + us_ally + 
              EthFrac + log(pop) + tenure + start.hrscore, data = SoT_data, na.action=na.exclude)
  ld1 <- lm(change.libdem ~ AngloAmer.edu + euro.edu + noneuro.edu + 
              log(rgdppc) + oil +  UK_col + us_ally + 
              EthFrac + log(pop) + tenure + start.libdem, data = SoT_data, na.action=na.exclude)
  ld2 <- lm(change.libdem ~ AA.econ + AA.law + AA.socsci + AA.hum + AA.other + noAA.econ + noAA.law+ noAA.socsci + noAA.hum + noAA.other + AAmil.edu + noAAmil.edu +
              log(rgdppc) + oil  + UK_col + us_ally + 
              EthFrac + log(pop) + tenure + start.libdem, data = SoT_data, na.action=na.exclude)
  
  # model fit
  
  # tertiary education and trade
  fit.trade<-cbind(SoT_data1, fit.trade=fitted(trade1), resid.trade=resid(trade1))
  fit.trade<-select(fit.trade, obsid, name, idacr, leader, university, uni.country, uni.type, fit.trade, resid.trade, 
                    change.trade.liberal, AngloAmer.edu, euro.edu, noneuro.edu, rgdppc, oil, xconstV, UK_col, us_ally, EthFrac, pop, tenure)
  fit.trade1<-fit.trade%>%filter(change.trade.liberal==1)
  fit.trade0<-fit.trade%>%filter(change.trade.liberal==0)
  mean(fit.trade1$fit.trade)
  mean(fit.trade0$fit.trade,na.rm = TRUE)

  # Figure A.2(a): Model Fit and Outliers for Estimates of Trade Liberalization (Education Model)
  fit.trade %>% arrange(desc(-resid.trade))  %>% mutate(change.trade.liberal=as.factor(change.trade.liberal),leader_country=paste(leader, idacr, sep=", "), 
                                                        leader_country_label1=ifelse(fit.trade>.43, leader_country,"")) %>%
    select(change.trade.liberal,fit.trade,leader_country_label1) %>%
    na.omit() %>%
    ggplot() +  
    geom_boxplot(aes(y=fit.trade,x=change.trade.liberal),width=.6) +
    geom_point(aes(x=as.factor(1),y=.5936)) + geom_point(aes(x=as.factor(1),y=.5125)) + geom_point(aes(x=as.factor(1),y=.46)) +
    stat_summary(aes(y=fit.trade,x=change.trade.liberal),fun=mean, geom="point", shape=15, size=2) +
    geom_text(aes(x=as.factor(0), y=c(.148), label=c("mean = .148")), nudge_x=.16, size=4) +
    geom_text(aes(x=as.factor(1), y=c(.239), label=c("mean = .239")), nudge_x=.16, size=4) +
    geom_text_repel(aes(x=change.trade.liberal,y=fit.trade, label=leader_country_label1), nudge_x=.12,  
                    force = 0.5, direction = "y", hjust = 0, segment.size = 0.1)  +
    scale_y_continuous("Probability of Trade Liberalization") + 
    scale_x_discrete("Observed Trade Liberalization", labels=c("Did Not Liberalize","Liberalized")) +
    theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
  ggsave("trade1.pdf")
  
    # Correctly classified
    trade.classify <-fit.trade %>% select(change.trade.liberal,fit.trade) %>% na.omit()
    trade.classify$correct[(trade.classify$fit.trade>mean(trade.classify$change.trade.liberal) & trade.classify$change.trade.liberal==1) | (trade.classify$fit.trade<mean(trade.classify$change.trade.liberal) & trade.classify$change.trade.liberal==0)]<-1
    trade.classify$correct[(trade.classify$fit.trade>mean(trade.classify$change.trade.liberal) & trade.classify$change.trade.liberal==0) | (trade.classify$fit.trade<mean(trade.classify$change.trade.liberal) & trade.classify$change.trade.liberal==1)]<-0
    summary(trade.classify$correct)
    table(trade.classify$correct)
  
  # specialization and trade
  fit.trade2<-cbind(SoT_data1, fit.trade=fitted(trade2), resid.trade=resid(trade2))
  fit.trade2<-select(fit.trade2, obsid, name, idacr, leader, university, uni.country, uni.type, fit.trade, resid.trade, 
                     change.trade.liberal, AA.econ, AA.law, AA.socsci, AA.hum, AA.other, noAA.econ, noAA.law, noAA.socsci, noAA.hum, noAA.other, AAmil.edu, noAAmil.edu, rgdppc, oil, xconstV, UK_col, us_ally, EthFrac, pop, tenure)
  fit.trade1_2<-fit.trade2%>%filter(change.trade.liberal==1)
  fit.trade0_2<-fit.trade2%>%filter(change.trade.liberal==0)
  mean(fit.trade1_2$fit.trade)
  mean(fit.trade0_2$fit.trade,na.rm = TRUE)
  
  # Figure A.2(b): Model Fit and Outliers for Estimates of Trade Liberalization (Subject Studied)
  fit.trade2 %>% arrange(desc(-resid.trade))  %>% mutate(change.trade.liberal=as.factor(change.trade.liberal),leader_country=paste(leader, idacr, sep=", "), 
                                                         leader_country_label1=ifelse((fit.trade>.4 & change.trade.liberal==0) | (fit.trade>.6 & change.trade.liberal==1), leader_country,"")) %>%
    select(change.trade.liberal,fit.trade,leader_country_label1) %>%
    na.omit() %>%
    ggplot() +  
    geom_boxplot(aes(y=fit.trade,x=change.trade.liberal),width=.6) +
    geom_point(aes(x=as.factor(1),y=.6267)) +
    stat_summary(aes(y=fit.trade,x=change.trade.liberal),fun=mean, geom="point", shape=15, size=2) +
    geom_text(aes(x=as.factor(0), y=c(.144), label=c("mean = .144")), nudge_x=.16, size=4) +
    geom_text(aes(x=as.factor(1), y=c(.259), label=c("mean = .259")), nudge_x=.16, size=4) +
    geom_text_repel(aes(x=change.trade.liberal,y=fit.trade, label=leader_country_label1), nudge_x=.12,  
                    force = 0.5, direction = "y", hjust = 0, segment.size = 0.1)  +
    scale_y_continuous("Probability of Trade Liberalization") + 
    scale_x_discrete("Observed Trade Liberalization", labels=c("Did Not Liberalize","Liberalized")) +
    theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
  ggsave("trade2.pdf")
  
    # Correctly classified
    trade.classify <-fit.trade2 %>% select(change.trade.liberal,fit.trade) %>% na.omit()
    trade.classify$correct[(trade.classify$fit.trade>mean(trade.classify$change.trade.liberal) & trade.classify$change.trade.liberal==1) | (trade.classify$fit.trade<mean(trade.classify$change.trade.liberal) & trade.classify$change.trade.liberal==0)]<-1
    trade.classify$correct[(trade.classify$fit.trade>mean(trade.classify$change.trade.liberal) & trade.classify$change.trade.liberal==0) | (trade.classify$fit.trade<mean(trade.classify$change.trade.liberal) & trade.classify$change.trade.liberal==1)]<-0
    summary(trade.classify$correct)
    table(trade.classify$correct)
    
  
  # tertiary education and rule of law
  fit.rl<-cbind(SoT_data, fit.rl=fitted(rl1), resid.rl=resid(rl1))
  fit.rl<-select(fit.rl, obsid, name, idacr, leader, university, uni.country, uni.type, fit.rl, resid.rl, 
                   change.LJI, AngloAmer.edu, euro.edu, noneuro.edu, rgdppc, oil, xconstV, UK_col, us_ally, EthFrac, pop, tenure)
  fit.rl <- fit.rl %>% arrange(desc(change.LJI))  %>% 
      mutate(leader_country=paste(leader, idacr, sep=", "))

  # Figure A.3(a): Model Fit and Outliers for Estimates of Rule of Law (Education Model)
  fit.rl %>% mutate(leader_country=paste(leader, idacr, sep=", "), 
                      leader_country_label1=ifelse((fit.rl>.0875) | (fit.rl<(-.048)), leader_country,"")) %>%
      select(change.LJI,fit.rl,leader_country_label1) %>%
      na.omit() %>%
      ggplot(aes(x=change.LJI, y=fit.rl)) +  
      geom_point() +
      geom_text_repel(aes(x=change.LJI, y=fit.rl, label=leader_country_label1), nudge_x=.02,  
                      force = 0.5, direction = "y", hjust = 0, segment.size = 0.1)  +
      geom_smooth(method="lm", aes(x=change.LJI, y=fit.rl),se=FALSE,colour="black",linetype=1) +
      geom_hline(aes(yintercept=0), linetype=2,size=.5) +
      geom_vline(aes(xintercept=0), linetype=2,size=.5) +    
      scale_y_continuous("Predicted Change in Rule of Law",limits = c(-0.15,.15)) + 
      scale_x_continuous("Observed Change in Rule of Law") +
      theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
  ggsave("rl1.pdf")
    
  # specialization and rule of law
  fit.rl2<-cbind(SoT_data, fit.rl=fitted(rl2), resid.rl=resid(rl2))
  fit.rl2<-select(fit.rl2, obsid, name, idacr, leader, university, uni.country, uni.type, fit.rl, resid.rl, 
                    change.LJI, AA.econ, AA.law, AA.socsci, AA.hum, AA.other, noAA.econ, noAA.law, noAA.socsci, noAA.hum, noAA.other, AAmil.edu, noAAmil.edu, rgdppc, oil, xconstV, UK_col, us_ally, EthFrac, pop, tenure)
  fit.rl2 <- fit.rl2 %>% arrange(desc(-resid.rl))  %>% 
      mutate(leader_country=paste(leader, idacr, sep=", "))

  # Figure A.3(b): Model Fit and Outliers for Estimates of Rule of Law (Subject Studied)
  fit.rl2 %>% mutate(leader_country=paste(leader, idacr, sep=", "), 
                       leader_country_label1=ifelse((fit.rl>.095) | (fit.rl<(-.06)), leader_country,"")) %>%
      select(change.LJI,fit.rl,leader_country_label1) %>%
      na.omit() %>%
      ggplot(aes(x=change.LJI, y=fit.rl)) +  
      geom_point() +
      geom_text_repel(aes(x=change.LJI, y=fit.rl, label=leader_country_label1), nudge_x=.02,  
                      force = 0.5, direction = "y", hjust = 0, segment.size = 0.1)  +
      geom_smooth(method="lm", aes(x=change.LJI, y=fit.rl),se=FALSE,colour="black",linetype=1) +
      geom_hline(aes(yintercept=0), linetype=2,size=.5) +
      geom_vline(aes(xintercept=0), linetype=2,size=.5) +    
      scale_y_continuous("Predicted Change in Rule of Law",limits = c(-0.15,.15)) + 
      scale_x_continuous("Observed Change in Rule of Law") +
      theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
  ggsave("rl2.pdf")  
  
  # tertiary education and financial openness
  fit.fo<-cbind(SoT_data, fit.fo=fitted(ka1), resid.fo=resid(ka1))
  fit.fo<-select(fit.fo, obsid, name, idacr, leader, university, uni.country, uni.type, fit.fo, resid.fo, 
                 change.kaopen, AngloAmer.edu, euro.edu, noneuro.edu, rgdppc, oil, xconstV, UK_col, us_ally, EthFrac, pop, tenure)
  fit.fo <- fit.fo %>% arrange(desc(-resid.fo))  %>% 
    mutate(leader_country=paste(leader, idacr, sep=", "))

  # Figure A.4(a): Model Fit and Outliers for Estimates of Financial Openness (Education Model)
  fit.fo %>% mutate(leader_country=paste(leader, idacr, sep=", "), 
                    leader_country_label1=ifelse((fit.fo>.95) | (fit.fo<(-.75)), leader_country,"")) %>%
    select(change.kaopen,fit.fo,leader_country_label1) %>%
    na.omit() %>%
    ggplot(aes(x=change.kaopen, y=fit.fo)) +  
    geom_point() +
    geom_text_repel(aes(x=change.kaopen, y=fit.fo, label=leader_country_label1), nudge_x=.25,  
                    force = 0.5, direction = "y", hjust = 0, segment.size = 0.1)  +
    geom_smooth(method="lm", aes(x=change.kaopen, y=fit.fo),se=FALSE,colour="black",linetype=1) +
    geom_hline(aes(yintercept=0), linetype=2,size=.5) +
    geom_vline(aes(xintercept=0), linetype=2,size=.5) +    
    scale_y_continuous("Predicted Change in Financial Openness") + 
    scale_x_continuous("Observed Change in Financial Openness") +
    theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
  ggsave("fo1.pdf")
  
  # specialization and financial openness
  fit.fo2<-cbind(SoT_data, fit.fo=fitted(ka2), resid.fo=resid(ka2))
  fit.fo2<-select(fit.fo2, obsid, name, idacr, leader, university, uni.country, uni.type, fit.fo, resid.fo, 
                  change.kaopen, AA.econ, AA.law, AA.socsci, AA.hum, AA.other, noAA.econ, noAA.law, noAA.socsci, noAA.hum, noAA.other, rgdppc, oil, xconstV, UK_col, us_ally, EthFrac, pop, tenure)
  fit.fo2 <- fit.fo2 %>% arrange(desc(-resid.fo))  %>% 
    mutate(leader_country=paste(leader, idacr, sep=", "))

  # Figure A.4(b): Model Fit and Outliers for Estimates of Financial Openness (Subject Studied)
  fit.fo2 %>% mutate(leader_country=paste(leader, idacr, sep=", "), 
                     leader_country_label1=ifelse((fit.fo>.95) | (fit.fo<(-.75)), leader_country,"")) %>%
    select(change.kaopen,fit.fo,leader_country_label1) %>%
    na.omit() %>%
    ggplot(aes(x=change.kaopen, y=fit.fo)) +  
    geom_point() +
    geom_text_repel(aes(x=change.kaopen, y=fit.fo, label=leader_country_label1), nudge_x=.25,  
                    force = 0.5, direction = "y", hjust = 0, segment.size = 0.1)  +
    geom_smooth(method="lm", aes(x=change.kaopen, y=fit.fo),se=FALSE,colour="black",linetype=1) +
    geom_hline(aes(yintercept=0), linetype=2,size=.5) +
    geom_vline(aes(xintercept=0), linetype=2,size=.5) +    
    scale_y_continuous("Predicted Change in Financial Openness") + 
    scale_x_continuous("Observed Change in Financial Openness") +
    theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
  ggsave("fo2.pdf")  
  
  
  # tertiary education and human rights
  fit.hr<-cbind(SoT_data, fit.hr=fitted(hr1), resid.fo=resid(hr1))
  fit.hr<-select(fit.hr, obsid, name, idacr, leader, university, uni.country, uni.type, fit.hr, resid.fo, 
                 change.hrscore, AngloAmer.edu, euro.edu, noneuro.edu, rgdppc, oil, xconstV, UK_col, us_ally, EthFrac, pop, tenure)
  fit.hr <- fit.hr %>% arrange(desc(-resid.fo))  %>% 
    mutate(leader_country=paste(leader, idacr, sep=", "))

  # Figure A.5(a): Model Fit and Outliers for Estimates of Human Rights (Education Model)
  fit.hr %>% mutate(leader_country=paste(leader, idacr, sep=", "), 
                    leader_country_label1=ifelse((fit.hr>.75) | (fit.hr<(-.65)), leader_country,"")) %>%
    select(change.hrscore,fit.hr,leader_country_label1) %>%
    na.omit() %>%
    ggplot(aes(x=change.hrscore, y=fit.hr)) +  
    geom_point() +
    geom_text_repel(aes(x=change.hrscore, y=fit.hr, label=leader_country_label1), nudge_x=.25,  
                    force = 0.5, direction = "y", hjust = 0, segment.size = 0.1)  +
    geom_smooth(method="lm", aes(x=change.hrscore, y=fit.hr),se=FALSE,colour="black",linetype=1) +
    geom_hline(aes(yintercept=0), linetype=2,size=.5) +
    geom_vline(aes(xintercept=0), linetype=2,size=.5) +    
    scale_y_continuous("Predicted Change in Human Rights Protections") + 
    scale_x_continuous("Observed Change in Human Rights Protections") +
    theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
  ggsave("hr1.pdf")
  
  # specialization and human rights
  fit.hr2<-cbind(SoT_data, fit.hr=fitted(hr2), resid.fo=resid(hr2))
  fit.hr2<-select(fit.hr2, obsid, name, idacr, leader, university, uni.country, uni.type, fit.hr, resid.fo, 
                  change.hrscore, AA.econ, AA.law, AA.socsci, AA.hum, AA.other, noAA.econ, noAA.law, noAA.socsci, noAA.hum, noAA.other, rgdppc, oil, xconstV, UK_col, us_ally, EthFrac, pop, tenure)
  fit.hr2 <- fit.hr2 %>% arrange(desc(-resid.fo))  %>% 
    mutate(leader_country=paste(leader, idacr, sep=", "))

  # Figure A.5(b): Model Fit and Outliers for Estimates of Human Rights  (Subject Studied)
  fit.hr2 %>% mutate(leader_country=paste(leader, idacr, sep=", "), 
                     leader_country_label1=ifelse((fit.hr>.75) | (fit.hr<(-.65)), leader_country,"")) %>%
    select(change.hrscore,fit.hr,leader_country_label1) %>%
    na.omit() %>%
    ggplot(aes(x=change.hrscore, y=fit.hr)) +  
    geom_point() +
    geom_text_repel(aes(x=change.hrscore, y=fit.hr, label=leader_country_label1), nudge_x=.25,  
                    force = 0.5, direction = "y", hjust = 0, segment.size = 0.1)  +
    geom_smooth(method="lm", aes(x=change.hrscore, y=fit.hr),se=FALSE,colour="black",linetype=1) +
    geom_hline(aes(yintercept=0), linetype=2,size=.5) +
    geom_vline(aes(xintercept=0), linetype=2,size=.5) +    
    scale_y_continuous("Predicted Change in Human Rights Protections") + 
    scale_x_continuous("Observed Change in Human Rights Protections") +
    theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
  ggsave("hr2.pdf")
  
  
  # tertiary education and liberal democracy
  fit.ld<-cbind(SoT_data, fit.ld=fitted(ld1), resid.fo=resid(ld1))
  fit.ld<-select(fit.ld, obsid, name, idacr, leader, university, uni.country, uni.type, fit.ld, resid.fo, 
                 change.libdem, AngloAmer.edu, euro.edu, noneuro.edu, rgdppc, oil, xconstV, UK_col, us_ally, EthFrac, pop, tenure)
  fit.ld <- fit.ld %>% arrange(desc(-resid.fo))  %>% 
    mutate(leader_country=paste(leader, idacr, sep=", "))

  # Figure A.6(a): Model Fit and Outliers for Estimates of Liberal Democracy (Education Model)
  fit.ld %>% mutate(leader_country=paste(leader, idacr, sep=", "), 
                    leader_country_label1=ifelse((fit.ld>.1175) | (fit.ld<(-.08)), leader_country,"")) %>%
    select(change.libdem,fit.ld,leader_country_label1) %>%
    na.omit() %>%
    ggplot(aes(x=change.libdem, y=fit.ld)) +  
    geom_point() +
    geom_text_repel(aes(x=change.libdem, y=fit.ld, label=leader_country_label1), nudge_x=.03,  
                    force = 0.5, direction = "y", hjust = 0, segment.size = 0.1)  +
    geom_smooth(method="lm", aes(x=change.libdem, y=fit.ld),se=FALSE,colour="black",linetype=1) +
    geom_hline(aes(yintercept=0), linetype=2,size=.5) +
    geom_vline(aes(xintercept=0), linetype=2,size=.5) +    
    scale_y_continuous("Predicted Change in Liberal Democracy") + 
    scale_x_continuous("Observed Change in Liberal Democracy") +
    theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
  ggsave("ld1.pdf")
  
  # specialization and liberal democracy
  fit.ld2<-cbind(SoT_data, fit.ld=fitted(ld2), resid.fo=resid(ld2))
  fit.ld2<-select(fit.ld2, obsid, name, idacr, leader, university, uni.country, uni.type, fit.ld, resid.fo, 
                  change.libdem, AA.econ, AA.law, AA.socsci, AA.hum, AA.other, noAA.econ, noAA.law, noAA.socsci, noAA.hum, noAA.other, rgdppc, oil, xconstV, UK_col, us_ally, EthFrac, pop, tenure)
  fit.ld2 <- fit.ld2 %>% arrange(desc(-resid.fo))  %>% 
    mutate(leader_country=paste(leader, idacr, sep=", "))

  # Figure A.6(b): Model Fit and Outliers for Estimates of Liberal Democracy (Subject Studied)
  fit.ld2 %>% mutate(leader_country=paste(leader, idacr, sep=", "), 
                     leader_country_label1=ifelse((fit.ld>.11) | (fit.ld<(-.1)) | (fit.ld<(-.06) & change.libdem>.15), leader_country,"")) %>%
    select(change.libdem,fit.ld,leader_country_label1) %>%
    na.omit() %>%
    ggplot(aes(x=change.libdem, y=fit.ld)) +  
    geom_point() +
    geom_text_repel(aes(x=change.libdem, y=fit.ld, label=leader_country_label1), nudge_x=.03,  
                    force = 0.5, direction = "y", hjust = 0, segment.size = 0.1)  +
    geom_smooth(method="lm", aes(x=change.libdem, y=fit.ld),se=FALSE,colour="black",linetype=1) +
    geom_hline(aes(yintercept=0), linetype=2,size=.5) +
    geom_vline(aes(xintercept=0), linetype=2,size=.5) +    
    scale_y_continuous("Predicted Change in Liberal Democracy") + 
    scale_x_continuous("Observed Change in Liberal Democracy") +
    theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 
  ggsave("ld2.pdf")
  
  